###############################################################
# Replication Materials for "Collective Action Infrastructure:
# The Downstream Effects of Urban Neighborhood Organizing" 
# Date created: Jan 23, 2022
# Date last updated: Jan 31, 2022
# Last updated by: AJH
# This script uses the cleaned data. For data processing, see replication_preprocessing
###############################################################
library(tidyverse)
library(janitor)
library(stargazer)
library(sf)
library(lfe)
library(alpaca)

#############
# Loading Data 
#############
# read in the survey data
load("cai_survey.Rdata")

#######################################
# Summary Statistics and Plots
#######################################

  ###########################  
  # Survey representativeness
  ###########################
  # How many neighborhoods are represented in the survey?
  table(dat$alcaldia) # all 16 alcaldias are represented
  length(unique(dat$cve_col)) # 744 colonias are represented
  dat %>% group_by(cve_col) %>% tally %>% summarize(mean(n)) # mean number of responses from each colonia is 2.27

   # read in population level data from census
   pop <- read_csv("2_Data/census/population.csv") %>% 
      filter(data == "Población") %>% 
      filter(alcaldia !="Total") %>% 
      mutate(cve_alc = substr(alcaldia, 1,3))
    pop$pop <- as.numeric(str_replace(pop$Total, " ", ""))
    pop$share <- pop$pop/ 9209944
    pop <- pop %>% select(cve_alc,pop, share)
    pop$class <- "population"
    alc_codes <-  read_csv("2_Data/alcaldias.csv") %>% 
      rename(alcaldia = nomgeo)
    alc_codes$cve_alc <- str_pad(alc_codes$cve_mun,3,"left","0")
    pop <- left_join(alc_codes[,c("cve_alc", "alcaldia")], pop, by = "cve_alc")
    
    alc <- dat %>% 
      filter(last_answer>13) %>% 
      group_by(alcaldia) %>% 
      tally() %>% 
      mutate(share = n/1690) %>% 
      filter(!is.na(alcaldia)) %>% 
      ungroup()
    alc$class <- "survey"
    alc[alc$alcaldia=="Cuajimalpa",]$alcaldia <- "Cuajimalpa de Morelos"
    alc[alc$alcaldia=="Gustavo A Madero",]$alcaldia <- "Gustavo A. Madero"
    
    alc_new <- rbind(alc[,c("alcaldia", "share", "class")], pop[,c("alcaldia", "share", "class")])
    
    pdf(file = "3_Output/Plots/summary/alcaldia_representativeness.pdf", height = 6, width = 9)
    ggplot(alc_new) + 
      geom_bar(aes(x= alcaldia, y= share, fill= class, group = class),stat= "identity", position = "dodge")+
      theme_classic()+
      scale_fill_grey()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
      labs(title = "Alcaldia Representation: Survey vs. 2020 Census",
           fill = "", x= "", y = "")+
      theme(legend.position = "bottom", axis.text = element_text(size = 15), title=element_text(size=20)) 
    dev.off()
    
    # age
    age <- read_csv("2_Data/census/age_total_cdmx.csv")
    age$age <- as.numeric(substr(age$years,1,2))
    age$num <- as.numeric(str_replace(age$num," ", ""))
    weighted.mean(age[age$age>18,]$age, w= age[age$age>18,]$num)
    
    
    dat$age_cat <- case_when(
      dat$age > 18 & dat$age <=25 ~ "18-25", 
      dat$age >25& dat$age <=40 ~ "26-40", 
      dat$age >40 & dat$age <=55 ~ "40-55",
      dat$age >55 & dat$age <=70 ~ "55-70",
      dat$age >70 ~"70+",
      TRUE ~ NA_character_
    ) 
    
    survey_age <- dat %>% group_by(age_cat) %>% tally() %>% 
      mutate(share=n/1690)
    survey_age$class <- "survey"
    
    age$age_cat <- case_when(
      age$age > 18 & age$age <=25 ~ "18-25", 
      age$age >25& age$age <=40 ~ "26-40", 
      age$age >40 & age$age <=55 ~ "40-55",
      age$age >55 & age$age <=70 ~ "55-70",
      age$age >70 ~"70+",
      TRUE ~ NA_character_
    ) 
    pop_age <- age %>% 
      group_by(age_cat) %>% 
      mutate(share=sum(num)/9209944)
    pop_age$class <- "population"
    
    pdf(file = "3_Output/Plots/summary/age_representativeness.pdf", height = 6, width = 9)
    age_new <- bind_rows(pop_age, survey_age[,c("age_cat", "share", "class")],)
    ggplot(age_new[!is.na(age_new$age_cat),]) + 
      geom_bar(aes(x= age_cat, y= share, fill= class, group = class),stat= "identity", position = "dodge")+
      theme_classic()+
      scale_fill_grey()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
      labs(title = "Age Representation: Survey vs. 2020 Census",
           fill = "", x= "", y = "")+
      theme(legend.position = "bottom", axis.text = element_text(size = 15), title=element_text(size=20))
    dev.off()
    mean(dat$age,na.rm=TRUE)
    
    #population
    #Male 4,404,927
    # Female: 4,805,017
    4805017/(4805017+4404927)
    
    ###########################  
    # Earthquake affected zone plot
    ###########################
    colonias <- read_sf('2_Data/coloniascdmx/coloniascdmx.shp')
    earthquake <- read_csv("2_Data/earthquake_2017/overlap_earthquake_colonias.csv")
    earthquake$in_zone <- ifelse(!is.na(earthquake$zona), 1,0)
    earthquake <- earthquake %>% group_by(cve_col) %>% dplyr::summarize(in_zone = max(in_zone))
    colonias_in_zone <- left_join( colonias, earthquake[,c("cve_col", "in_zone")], by = "cve_col")
    
    pdf(file = "3_Output/Plots/shocks/earthquake_zone.pdf")
    ggplot(colonias_in_zone) + geom_sf(aes(fill = factor(in_zone)))+
      theme_classic()+
      theme(legend.position = "bottom")+
      labs(title = "Affected Zone, 2017 earthquake", fill = "Affected Zone") 
    dev.off()
    
    rm(list=setdiff(ls(), "dat"))
    
  ###########################
  # Top Two Issues
  ###########################
    dat$problem_crime_toptwo <- ifelse(dat$problem_robberies_toptwo==1|
                                         dat$problem_drug_dealers_toptwo==1|
                                         dat$problem_violent_crime_toptwo==1,
                                       1,0)
    issue_means_top_two <- dat %>% summarize(
      construction = mean(problem_construction_toptwo, na.rm=TRUE),
      lighting = mean(problem_lighting_toptwo, na.rm=TRUE),
      water = mean(problem_water_toptwo, na.rm=TRUE), 
      flooding = mean(problem_flooding_toptwo, na.rm=TRUE),
      trash = mean(problem_trash_toptwo, na.rm=TRUE),
      #robberies = mean(problem_robberies_toptwo, na.rm=TRUE),
      roads = mean(problem_roads_toptwo, na.rm=TRUE),
      title = mean(problem_title_toptwo, na.rm=TRUE),
      #drug_dealers = mean(problem_drug_dealers_toptwo, na.rm=TRUE),
      drug_use = mean(problem_drug_use_toptwo, na.rm=TRUE),
      roads = mean(problem_drug_use_toptwo, na.rm=TRUE),
      school = mean(problem_school_toptwo, na.rm=TRUE),
      earthquake = mean(problem_earthquake_toptwo, na.rm=TRUE),
      #violent_crime = mean(problem_violent_crime_toptwo, na.rm=TRUE)
      crime = mean(problem_crime_toptwo, na.rm=TRUE)) %>% 
      pivot_longer( cols = c("construction", "water","flooding", "lighting", "trash",  "roads", "title",  "drug_use", "roads", "school", "earthquake","crime"), 
                    values_to = "mean", 
                    names_to = "issue")
    
    issue_means_top_two$issue <- factor(issue_means_top_two$issue,
                                        levels = c("construction", "water","flooding", "lighting", "trash",  "roads", "title",  "drug_use", "roads", "school", "earthquake", "crime"),
                                        labels = c("Construction",  "Water","Flooding", "Public \n Lighting", "Trash", "Roads", "Land \n Title",  "Drug Use", "Roads", "School/\n Education", "Earthquake", "Crime\n (Violent Crime, \n Robberies, \n Drug Dealers)"))
    pdf(file = "3_Output/Plots/summary/issues_toptwo.pdf", height = 4, width = 6)
    ggplot(issue_means_top_two) + 
      geom_bar(aes(x = reorder(issue, -mean), y = mean),stat= "identity")+
      theme_classic()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12))+ 
      labs(title = "Issues Listed as a Top-Two Neighborhood Problem", x = "", y = "Percent of respondents",
           caption = "Survey responses from an online survey \n of Mexico City respondents, August 2021\n N = 1446")
    dev.off()

    ########################################################
    # Figure: What factors contributed to your organizing?
    ########################################################
    # Crime and noncrime version
    act_crime_factors <- dat %>% 
      filter(last_answer>76) %>% 
      filter(act_crime_any==1) %>% 
      select(response_id, act_crime_factors_nc, act_crime_factors_newsocial, act_crime_factors_social, act_crime_factors_otherissues, act_crime_factors_whatsapp) %>% 
      pivot_longer(cols=c(act_crime_factors_nc, act_crime_factors_newsocial, act_crime_factors_social, act_crime_factors_otherissues,act_crime_factors_whatsapp),
                   names_to="factor", values_to="yesno", names_prefix = "act_crime_factors_") %>% 
      filter(yesno==TRUE) %>% 
      group_by(factor) %>% 
      summarize(perc = n()/1379*100) %>% 
      mutate(type ="Around crime")
    
    act_noncrime_factors <- dat %>% 
      filter(last_answer>76) %>% 
      filter(act_noncrime_any==1) %>% 
      select(response_id, act_noncrime_factors_nc, act_noncrime_factors_newsocial, act_noncrime_factors_social, act_noncrime_factors_otherissues, act_noncrime_factors_whatsapp) %>% 
      pivot_longer(cols=c(act_noncrime_factors_nc, act_noncrime_factors_newsocial, act_noncrime_factors_social, act_noncrime_factors_otherissues,act_noncrime_factors_whatsapp),
                   names_to="factor", values_to="yesno", names_prefix = "act_noncrime_factors_") %>% 
      filter(yesno==TRUE) %>% 
      group_by(factor) %>% 
      summarize(perc = n()/1379*100)%>% 
      mutate(type ="Around non-crime issues")
    factors <- bind_rows(act_crime_factors, act_noncrime_factors)
    
    rm(list=setdiff(ls(), "dat"))
    
    # combining answers to either
    dat$factors_nc <-ifelse(dat$act_crime_factors_nc == 1| dat$act_noncrime_factors_nc==1,1,0)
    dat$factors_nc <-ifelse(is.na(dat$factors_nc), 0, dat$factors_nc)
    dat$factors_otherissues <-ifelse(dat$act_crime_factors_otherissues == 1| dat$act_noncrime_factors_otherissues==1,1,0)
    dat$factors_otherissues <-ifelse(is.na(dat$factors_otherissues), 0, dat$factors_otherissues)
    dat$factors_social <-ifelse(dat$act_crime_factors_social == 1| dat$act_noncrime_factors_social==1,1,0)
    dat$factors_social <-ifelse(is.na(dat$factors_social), 0, dat$factors_social)
    dat$factors_newsocial <-ifelse(dat$act_crime_factors_newsocial == 1| dat$act_noncrime_factors_newsocial==1,1,0)
    dat$factors_newsocial <-ifelse(is.na(dat$factors_newsocial), 0, dat$factors_newsocial)
    dat$factors_whatsapp <-ifelse(dat$act_crime_factors_whatsapp == 1| dat$act_noncrime_factors_whatsapp ==1,1,0)
    dat$factors_whatsapp <-ifelse(is.na(dat$factors_whatsapp), 0, dat$factors_whatsapp)
    
    
    factors_general <- dat %>% 
      filter(last_answer>76) %>% 
      filter(act_noncrime_any==1|act_crime_any ==0) %>% 
      select(response_id, factors_nc, factors_newsocial, factors_social, factors_otherissues, factors_whatsapp) %>% 
      pivot_longer(cols=c(factors_nc, factors_newsocial, factors_social, factors_otherissues,factors_whatsapp),
                   names_to="factor", values_to="yesno", names_prefix = "factors_") %>% 
      filter(yesno==TRUE) %>% 
      group_by(factor) %>% 
      summarize(perc = n()/1379*100)
    
    factors_general$factor <- factor(factors_general$factor, levels=c("nc","newsocial","otherissues","social", "whatsapp"),
                             labels=c("A neighborhood \n association" , "We had \n organized \n around  \n other \n issues", "Neighborhood \n WhatsApp \n or Facebook \n group","We knew  \n each \n other \n socially", "Met \n while \n organizing"))
    factors_general$highlight <- c("no", "yes", "no", "no", "yes")
    
    pdf(file="3_Output/Plots/summary/factors_general.pdf", width =8, height = 4)
    ggplot(data=factors_general) + 
      geom_bar(aes(x=reorder(factor,-perc), y=perc, fill = highlight),stat="identity",position="dodge") +
      theme_classic()+
      scale_fill_grey()+
      theme(legend.position="bottom")+
      labs(x="", y="Percent of (organizing) respondents", fill= "",title="Did any of the following factors \n facilitate your organizing?")+
      theme(axis.text=element_text(size=15), axis.title=element_text(size=15),title=element_text(size=20)) +
      theme(legend.position = "null")
    dev.off()

       
    rm(list=setdiff(ls(), "dat"))
    
    ########################################################
    # Figure: Uses of social media (among those in groups with neighbors)  
    ########################################################
    # Uses of social media (among those in groups with neighbors)  
    whatsapp_uses <- dat[dat$last_answer>50,] %>%
      select(response_id, wa_use_problems, wa_use_politics, wa_use_problems, wa_use_solutions, 
             wa_use_famfriend, wa_use_funny) %>% 
      pivot_longer(cols =c("wa_use_problems", "wa_use_politics", "wa_use_problems", "wa_use_solutions", 
                           "wa_use_famfriend", "wa_use_funny"), 
                   names_prefix = "wa_use_", 
                   values_to= "whatsapp_use", 
                   names_to = "whatsapp_use_type")
    n_wa <- table(dat[dat$last_answer>50,]$wa_neighbors)[2]
    
    whatsapp_uses <- whatsapp_uses %>% group_by(whatsapp_use_type) %>% 
      filter(whatsapp_use == TRUE) %>% tally() %>% mutate(media = "Whatsapp") %>% 
      rename(use_type = whatsapp_use_type) %>% 
      mutate(n_pct = n/n_wa*100)
    
    facebook_uses <- dat[dat$last_answer>54,] %>%
      select(response_id, fb_use_problems, fb_use_politics, fb_use_problems, fb_use_solutions, 
             fb_use_famfriend, fb_use_funny, fb_use_news, fb_use_buysell, fb_use_contact_gov) %>% 
      pivot_longer(cols =c("fb_use_problems", "fb_use_politics", "fb_use_problems", "fb_use_solutions", 
                           "fb_use_famfriend", "fb_use_funny", "fb_use_news", "fb_use_buysell", "fb_use_contact_gov"), 
                   names_prefix = "fb_use_", 
                   values_to= "facebook_use", 
                   names_to = "facebook_use_type")
    n_fb <- table(dat[dat$last_answer>54,]$fb_neighbors)[2]
    
    facebook_uses <- facebook_uses %>% group_by(facebook_use_type) %>% 
      filter(facebook_use == TRUE) %>% tally() %>% mutate(media = "Facebook") %>% 
      rename(use_type = facebook_use_type) %>% 
      mutate(n_pct =n/n_fb*100)
    
    social_use <- bind_rows(facebook_uses, whatsapp_uses)
    social_use$use_type <- factor(social_use$use_type, 
                                  levels = c("famfriend", "funny", "politics", "problems", "solutions", "news", "buysell", "contact_gov"),
                                  labels = c("Keep in \n touch \n with \n family \n and \n friends", 
                                             "Share \n funny \n photos or \n videos", 
                                             "Talk \n about \n politics",
                                             "Share \n info \n about \n neighborhood \n problems",
                                             "Organize \n solutions  to \n neighborhood \n problems",
                                             "Learn \n about \n the news", 
                                             "Buy and sell \n things", 
                                             "Contact \n government \n officials"))
    
    pdf(file = "3_Output/Plots/summary/social_use.pdf", height = 6, width = 9)
    ggplot(data= social_use) + 
      geom_bar(aes(x= use_type, y = n_pct, group= media, fill= media), position = "dodge", stat = "identity")+
      labs(x= "", y = "Percent", fill= "", title = "For what purpose do you use social \n media groups with your neighbors?",
           caption = "Percents calculated out of total respondents who reported being in groups with their neighbors")+
      geom_text(aes(x= use_type, y = n_pct + 5,group=media,  label = paste(round(n_pct,0), "%", sep = "")), position=position_dodge(width=1))+
      theme_classic()+
      theme(legend.position = "bottom") + 
      scale_fill_grey()
    dev.off()
    
    
###################################
# Analysis: Tables 
###################################
    #########################################################
    # Table: Correlation  between crime- and water-organizing
    #########################################################
    
    #M1: Fixed effects crime-water, includes whatsapp
    #M2: Fixed effects crime-water, excludes whatsapp
    #M3: No Fixed effects crime-noncrime, includes whatsapp
    #M4: Fixed effects crime-noncrime, includes whatsapp
    #M5: No Fixed effects crime-water, excludes whatsapp
    #M6: Fixed effects crime-water, excludes whatsapp
    
    # Because Stargazer doesn't recognize feglm models, we create lm shells to fill in with the results
    # https://stackoverflow.com/questions/52027811/formatted-latex-regression-tables-with-multiple-models-from-broom-output
    # create shell models for stargazer 
    m0 <- glm(act_water ~ act_crime_any-1, data= dat)
    m1 <- feglm(act_water ~ act_crime_any|cve_col|cve_col, dat)
    coef_m1 <- coef(summary(m1,type="clustered", cluster.vars = "cve_col"))[1,][1]
    names(coef_m1)[1] <- "act_crime_any"
    se_m1 <- coef(summary(m1,type="clustered", cluster.vars = "cve_col"))[,2]
    names(se_m1)[1] <- "act_crime_any"

    # Exclude WhatsApp group as an outcome- to do this, we make a copy of the dataset in which the action vars are defined without whatsapp
    datprime <- dat
    datprime$act_water_any <- ifelse(datprime$act_noncrime_water ==1& (datprime$act_noncrime_group ==1|
                                                                    datprime$act_noncrime_request ==1|
                                                                    datprime$act_noncrime_protest ==1),1,0)
    datprime$act_crime_any <- ifelse(datprime$act_crime_watch ==1|
                                           datprime$act_crime_request ==1|
                                           datprime$act_crime_protest ==1,1,0)
    
    m2 <- feglm(act_water_any ~ act_crime_any|cve_col|cve_col, datprime)
    coef_m2 <- summary(m2, type = "clustered", cluster.vars= "cve_col")
    names(coef_m2)[1] <- "act_crime_any"
    
    coef_m2 <- coef(summary(m2,type="clustered", cluster.vars = "cve_col"))[1,][1]
    names(coef_m2)[1] <- "act_crime_any"
    se_m2 <- coef(summary(m2,type="clustered", cluster.vars = "cve_col"))[,2]
    names(se_m2)[1] <- "act_crime_any"
    
    m00 <- glm(act_noncrime_any ~ act_crime_any-1, data= dat)
    
    m3 <- feglm(act_noncrime_any ~ act_crime_any|0|cve_col, dat)
    m4 <- feglm(act_noncrime_any ~ act_crime_any|cve_col|cve_col, dat)
    
    coef_m3 <- coef(summary(m3,type="clustered", cluster.vars = "cve_col"))[1,][1]
    coef_m4 <- coef(summary(m4,type="clustered", cluster.vars = "cve_col"))[1,][1]
    names(coef_m3)[1] <- "act_crime_any"
    names(coef_m4)[1] <- "act_crime_any"
    
    se_m3 <- coef(summary(m3,type="clustered", cluster.vars = "cve_col"))[,2]
    se_m4 <- coef(summary(m4,type="clustered", cluster.vars = "cve_col"))[,2]
    names(se_m3)[1] <- "act_crime_any"
    names(se_m4)[1] <- "act_crime_any"
    
    # Exclude WhatsApp group as an outcome
    m5 <- feglm(act_noncrime_any ~ act_crime_any|0|cve_col, datprime)
    m6 <- feglm(act_noncrime_any ~ act_crime_any|cve_col|cve_col, datprime)
    coef_m5 <- coef(summary(m5,type="clustered", cluster.vars = "cve_col"))[1,][1]
    coef_m6 <- coef(summary(m6,type="clustered", cluster.vars = "cve_col"))[1,][1]
    names(coef_m5)[1] <- "act_crime_any"
    names(coef_m6)[1] <- "act_crime_any"
    
    se_m5 <- coef(summary(m5,type="clustered", cluster.vars = "cve_col"))[,2]
    se_m6 <- coef(summary(m6,type="clustered", cluster.vars = "cve_col"))[,2]
    names(se_m5)[1] <- "act_crime_any"
    names(se_m6)[1] <- "act_crime_any"
    
    stargazer(m0,m0,m00,m00,m00,m00, coef = list(coef_m1,coef_m2, coef_m3, coef_m4, coef_m5, coef_m6),
                                      se = list(se_m1,se_m2, se_m3, se_m4, se_m5, se_m6),
              dep.var.caption = c("Engages in organizing"), 
              dep.var.labels = c("Water", "Any Noncrime"),
              covariate.labels = "History of crime organizing",
              add.lines = list(c("Colonia-level fixed effects", "Yes", "Yes", "No", "Yes", "No", "Yes"),
                               c("Observations", "602","533", "1,473", "798", "14,73", "798"),
                                c("Includes WhatsApp as organizing", "Yes", "No", "Yes", "Yes", "No", "No")),
              notes = c("Logit models", "Standard errors clustered at colonia level", "Number of observations varies across fixed-effects analyses", "based on number of respondents from each neighborhood"), 
              out = c("3_Output/Tables/main_spec.tex"),
              label = "tab:mainspec", type = "text",
              title = "Respondents who organize around crime are more likely to report engaging in neighborhood organizing around other issues",
              omit.table.layout = "s")


    
    rm(list=setdiff(ls(), "dat"))
    
    
    ########################################
    # Table: Earthquake first stage results 
    #########################################
    # First stage 
    stage1 <-  felm(act_noncrime_earthquake_5 ~ problem_earthquake|0|0|0, data= dat)
    stage1_cl <-  felm(act_noncrime_earthquake_5 ~ problem_earthquake|0|0|cve_col, data= dat)
    
    fstats <-paste("F statistic",  
                   round(summary(stage1)$F.fstat[1],2), 
                   round(summary(stage1_cl)$F.fstat[1],2),
                   sep = " & ")
    
    stargazer(stage1,stage1_cl,  type = "text", add.lines = list(fstats, c("Colonia clustered standard error", "No", "Yes")),
              digits= 2, 
              dep.var.labels = c("Taking Action (Earthquake)"),
              title = "First Stage Results: Neighbors affected by earthquake problems are more likely to organize earthquake-related action",
              covariate.labels = c("Earthquake as \\\\ neighborhood problem \\\\ (Self-reported)", "Constant"),
              notes = c("Linear probability models"),
              keep.stat = c("n", "rsq"), 
              out = "3_Output/Tables/shocks/earthquake/1s_linear_prob_just_self_report.tex",
              label = "tab:earthquake_1s")
    
    # For Appendix: Table with all three first stages
    stage1_zone <-  felm(act_noncrime_earthquake_5 ~ in_zone|0|0|0, data= dat)
    stage1_cl_zone <-  felm(act_noncrime_earthquake_5 ~ in_zone|0|0|cve_col, data= dat)
    stage1_risk <-  felm(act_noncrime_earthquake_5 ~ risk_level|0|0|0, data= dat)
    stage1_cl_risk <-  felm(act_noncrime_earthquake_5 ~ risk_level|0|0|cve_col, data= dat)
    
    
    fstats_all <-paste("F statistic",  
                       round(summary(stage1)$F.fstat[1],2), 
                       round(summary(stage1_cl)$F.fstat[1],2),
                       round(summary(stage1_zone)$F.fstat[1],2), 
                       round(summary(stage1_cl_zone)$F.fstat[1],2),
                       round(summary(stage1_risk)$F.fstat[1],2), 
                       round(summary(stage1_cl_risk)$F.fstat[1],2),
                       sep = " & ")
    
    stargazer(stage1,stage1_cl,
              stage1_zone, stage1_cl_zone,
              stage1_risk, stage1_cl_risk,
              type = "text", 
              add.lines = list(fstats,
                               c("Colonia clustered std. errors", "No", "Yes", "No", "Yes", "No", "Yes")),
              digits= 2, 
              dep.var.labels = c("Taking Action (Earthquake)"),
              title = "First Stage Results: Comparing Earthquake-Related Instruments",
              covariate.labels = c("Earthquake as \\\\ neighborhood problem \\\\ (Self-reported)", "In earthquake zone", "Risk atlas score", "Constant"),
              notes = c("Linear probability models"),
              keep.stat = c("n", "rsq"), 
              out = "3_Output/Tables/shocks/earthquake/1s_linear_prob_appendix.tex",
              label = "tab:earthquake_1s_appendix")
    rm(list=setdiff(ls(), "dat"))
    
    # Table: OLS and 2sls results for earthquake and crime
    dat$act_crime_after_2017 <- ifelse(dat$act_crime_any==1& 
                                         (dat$act_crime_earliest=="2-3 years ago (2018 or 2019)"|
                                            dat$act_crime_earliest=="Last year (2020)"|
                                            dat$act_crime_earliest=="This year (2021)"
                                         ),1,0)
    
    
    ols_1 <- felm(act_crime_after_2017 ~ act_noncrime_earthquake_5|0|0|cve_col, data = dat)
    tsls_1 <- felm(act_crime_after_2017 ~ 0|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|0, data = dat)
    tsls_1_cl <- felm(act_crime_after_2017 ~ 0|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|cve_col, data = dat)
    rf_1 <- felm(act_crime_after_2017 ~ problem_earthquake |0|0|0, data = dat)
    rf_1_cl <- felm(act_crime_after_2017 ~ problem_earthquake |0|0|cve_col, data = dat)
    
     
    # note that the relevant f stat (confirmed with Stata) is the first stage F stat from above
    
    stargazer(ols_1,tsls_1,tsls_1_cl,  type = "text", 
              column.labels = c("OLS",  "2sls"),
              column.separate = c(1, 2),
              dep.var.caption = "Taking Action (Crime)",
              dep.var.labels.include = FALSE,
              digits = 3,
              add.lines = list(c("Clustered std. errors & Yes & No & Yes")),
              keep.stat=c("n"), 
              covariate.labels = c("Taking action \\\\ (earthquake)", "\\textit{Instrumented} \\\\ Taking action \\\\ (earthquake)",  "Constant"), 
              title = "Residents who engaged in earthquake-related organizing are more likely to engage in crime-related organizing after 2017",
              out = "3_Output/Tables/shocks/earthquake/2sls_crime.tex",
              label= "tab:2sls_crime")
    
    # Table: OLS and 2sls results for earthquake and non-crime (non-earthquake)
    dat$act_noncrime_not_earthquake <- ifelse(dat$act_noncrime_construction==1|
                                                dat$act_noncrime_flooding==1|
                                                dat$act_noncrime_lighting==1|
                                                dat$act_noncrime_water==1|
                                                dat$act_noncrime_trash==1|
                                                dat$act_noncrime_school==1,1,0)
    dat$act_noncrime_not_earthquake <- ifelse(dat$act_noncrime_construction==1|
                                                dat$act_noncrime_flooding==1|
                                                dat$act_noncrime_lighting==1|
                                                dat$act_noncrime_water==1|
                                                dat$act_noncrime_trash==1|
                                                dat$act_noncrime_school==1,1,0)
    dat$act_noncrime_not_earthquake <- ifelse(is.na(dat$act_noncrime_not_earthquake),0, dat$act_noncrime_not_earthquake)
    ols_2 <- felm(act_noncrime_not_earthquake ~ act_noncrime_earthquake_5|0|0|cve_col, data = dat)
    tsls_2 <- felm(act_noncrime_not_earthquake ~ 0|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|0, data = dat)
    tsls_2_cl <- felm(act_noncrime_not_earthquake ~ 0|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|cve_col, data = dat)
    rf_2 <- felm(act_noncrime_not_earthquake ~ problem_earthquake |0|0|0, data = dat)
    rf_2_cl <- felm(act_noncrime_not_earthquake ~ problem_earthquake |0|0|cve_col, data = dat)
    
    
    stargazer(ols_2,tsls_2,tsls_2_cl, type = "text", 
              column.labels = c("OLS",  "2sls"),
              column.separate = c(1, 2),
              dep.var.caption = "Taking Action (Noncrime)",
              dep.var.labels.include = FALSE,
              digits = 3,
              add.lines = list(c("Clustered std. errors & Yes & No & Yes & No & Yes")),
              keep.stat=c("n"), 
              covariate.labels = c("Taking action \\\\ (earthquake)", "\\textit{Instrumented} \\\\ Taking action \\\\ (earthquake)", "Constant"), 
              title = "Residents who engaged in earthquake-related organizing are more likely to engage in non-crime-related organizing",
              out = "3_Output/Tables/shocks/earthquake/2sls_noncrime.tex",
              label= "tab:2sls_noncrime")
    rm(list=setdiff(ls(), "dat"))
    #####
    #  Appendix: Crime and Non-crime, controlling for SES
    #####
    # Crime outcomes
    stage1_ses <- felm(act_noncrime_earthquake_5 ~ indice_num + problem_earthquake|0|0|cve_col, data = dat)
    ols_ses_crime <- felm(act_crime_after_2017 ~ act_noncrime_earthquake_5 + indice_num|0|0|cve_col, data = dat)
    tsls_1_ses_crime <- felm(act_crime_after_2017  ~ indice_num|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|0, data = dat)
    tsls_1_cl_ses_crime <- felm(act_crime_after_2017 ~ indice_num|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|cve_col, data = dat)
    
    stargazer(stage1_ses, ols_ses_crime,tsls_1_ses_crime,tsls_1_cl_ses_crime,  type = "text", 
              column.labels = c("First stage", "OLS",  "2sls"),
              column.separate = c(1, 1, 2),
              dep.var.labels = c("Earthquake Response", "Crime-Related collective action"),
              digits = 3,
              add.lines = list( c(paste("First stage F & ",  round(summary(stage1_ses)$P.fstat["F"],2), " & & ", sep = "")),
                                c("Clustered std. errors & Yes & No & No & Yes & No & No & Yes ")),
              keep.stat=c("n"), 
              covariate.labels = c("Taking action \\\\ (earthquake)", "Neighborhood Socio-Economic Status", "\\textit{Instrumented} \\\\ Taking action \\\\ (earthquake)",  "Earthquake as \\\\ neighborhood problem \\\\ (Self-reported)", "Constant"), 
              title = "When we control for socioeconomic status in crime estimates, coefficients remain positive, but insignificant",
              out = "3_Output/Tables/shocks/earthquake/2sls_robustness_crime.tex",
              label= "tab:2sls_robustness_crime")
    
    # Non-crime outcomes
    stage1_ses <- felm(act_noncrime_earthquake_5 ~ indice_num + problem_earthquake|0|0|cve_col, data = dat)
    ols_ses <- felm(act_noncrime_not_earthquake ~ act_noncrime_earthquake_5 + indice_num|0|0|cve_col, data = dat)
    tsls_1_ses <- felm(act_noncrime_not_earthquake  ~ indice_num|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|0, data = dat)
    tsls_1_cl_ses <- felm(act_noncrime_not_earthquake ~ indice_num|0|(act_noncrime_earthquake_5 ~ problem_earthquake)|cve_col, data = dat)
    
    stargazer(stage1_ses, ols_ses,tsls_1_ses,tsls_1_cl_ses,  type = "text", 
              column.labels = c("First stage", "OLS",  "2sls"),
              column.separate = c(1, 1, 2),
              dep.var.labels = c("Earthquake Response", "Non-earthquake collective action"),
              digits = 3,
              add.lines = list( c(paste("First stage F & ",  round(summary(stage1_ses)$P.fstat["F"],2), " & & ", sep = "")),
                                c("Clustered std. errors & Yes & No & No & Yes & No & No & Yes ")),
              keep.stat=c("n"), 
              covariate.labels = c("Taking action \\\\ (earthquake)", "Neighborhood Socio-Economic Status", "\\textit{Instrumented} \\\\ Taking action \\\\ (earthquake)",  "Earthquake as \\\\ neighborhood problem \\\\ (Self-reported)", "Constant"), 
              title = "When we control for socioeconomic status in non-crime estimates, coefficients remain positive, but insignificant",
              out = "3_Output/Tables/shocks/earthquake/2sls_robustness_noncrime.tex",
              label= "tab:2sls_robustness_noncrime")
    
    # Appendix: Controlling for crime directly renders 2sls insignificant
    r_stage1 <- felm(act_noncrime_earthquake_5 ~ problem_earthquake + robberies_2016  |0|0|cve_col, data = dat)
    r_ols <- felm(act_crime_after_2017 ~ act_noncrime_earthquake_5 + robberies_2016  |0|0|cve_col, data = dat)
    r_tsls_1 <- felm(act_crime_after_2017 ~  robberies_2016|0|(act_noncrime_earthquake ~ problem_earthquake)|0, data = dat)
    r_tsls_1_cl <- felm(act_crime_after_2017 ~ +robberies_2016|0|(act_noncrime_earthquake ~ problem_earthquake)|cve_col, data = dat)
    
    stargazer(r_stage1, r_ols, r_tsls_1, r_tsls_1_cl, 
              type = "text",
              column.labels = c("First stage", "OLS",  "2sls"),
              column.separate = c(1,1, 2),
              dep.var.labels = c("Earthquake Response", "Post-2017 crime-related collective action"),
              digits = 3,
              add.lines = list( c(paste("First stage F & ",  round(summary(r_stage1)$P.fstat["F"],2), " & & ", sep = "")),
                                c("Clustered std. errors & Yes & Yes & No & Yes")),
              keep.stat=c("n"), 
              covariate.labels = c("Earthquake \\\\ as neighborhood \\\\ problem", "Taking action \\\\ (earthquake)", "2016 robberies", "\\textit{Instrumented} \\\\ taking action \\\\ (earthquake)", "Constant"), 
              title = "Controlling for 2016 crime rates renders the IV estimates insignificant",
              out = "3_Output/Tables/shocks/earthquake/2sls_robustness_crime_rates.tex",
              label= "tab:2sls_robustness_crime_rates")
    
   
    
    ###### Table: The correlation in crime and non-crime organizing is strongest when we look at the same form of organizing (bold coefficients). This suggests that once respondents have used one method of organizing, they tend to continue to invoke that form of organizing. 
    f1_nfe <- glm(act_noncrime_request ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat[dat$last_answer>76,], family = "binomial")
    f2_nfe <- glm(act_noncrime_protest ~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat[dat$last_answer>76,], family = "binomial")
    f3_nfe <- glm(act_noncrime_whatsapp~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat[dat$last_answer>76,], family = "binomial")
    f4_nfe <- glm(act_noncrime_group ~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat[dat$last_answer>76,], family = "binomial")
    
    stargazer(f1_nfe, f2_nfe, f3_nfe, f4_nfe, 
              column.labels = c("Request", "Protest", "WhatsApp Group", "Forming a Group"),
              covariate.labels = c("(Crime) Request", "(Crime) Protest", "(Crime) WhatsApp Group", "(Crime) Neighborhood Watch"),
              dep.var.caption = c("Non-Crime Organizing"),
              dep.var.labels.include = FALSE,
              notes= c("Logit Models", "Results with fixed effects and", "clustering can be found in the appendix"),
              title = "The correlation in crime and non-crime organizing is strongest when we look at the same form of organizing (bold coefficients). This suggests that once respondents have used one method of organizing, they tend to continue to invoke that form of organizing.", 
              column.sep.width = "-1em",
              type = "text",
              keep.stat= c("n"),
              out = "3_Output/Tables/no_fixed_effects/action_by_action_nfe.tex")    
    # note that we edit this table's latex table to bold the relevant coefficients

    ###### Appendix Table: The correlation in crime and non-crime organizing is strongest when we look at the same form of organizing (bold coefficients). This suggests that once respondents have used one method of organizing, they tend to continue to invoke that form of organizing. 
    # This is the above table with fixed effects and clustering
    # now test the individual type of organizing for both issues
    f1 <- feglm(act_noncrime_request ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch |cve_col|0|cve_col, data =dat)
    f2 <- feglm(act_noncrime_protest ~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch  |cve_col|0|cve_col, data =dat)
    f3 <- feglm(act_noncrime_whatsapp~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch  |cve_col|0|cve_col, data =dat)
    f4 <- feglm(act_noncrime_group ~ act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch  |cve_col|0|cve_col, data =dat)
    
    # extract coefficients 
    coef_f1 <- coef(summary(f1,type="clustered", cluster.vars = "cve_col"))[,1]
    coef_f2 <- coef(summary(f2,type="clustered", cluster.vars = "cve_col"))[,1]
    coef_f3 <-coef(summary(f3,type="clustered", cluster.vars = "cve_col"))[,1]
    coef_f4 <-coef(summary(f4,type="clustered", cluster.vars = "cve_col"))[,1]
    
    # extract se's
    se_f1 <- coef(summary(f1,type="clustered", cluster.vars = "cve_col"))[,2]
    se_f2 <- coef(summary(f2,type="clustered", cluster.vars = "cve_col"))[,2]
    se_f3 <-coef(summary(f3,type="clustered", cluster.vars = "cve_col"))[,2]
    se_f4 <-coef(summary(f4,type="clustered", cluster.vars = "cve_col"))[,2]
    
    # make shell 
    f01 <- lm(act_noncrime_request ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat)
    f02 <- lm(act_noncrime_protest ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat)
    f03 <- lm(act_noncrime_whatsapp ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat)
    f04 <- lm(act_noncrime_group ~  act_crime_request + act_crime_protest + act_crime_whatsapp + act_crime_watch, data =dat)
    
    stargazer(f01,f02,f03,f04, coef = list(coef_f1, coef_f2,coef_f3, coef_f4), 
              se = list(se_f1, se_f2, se_f3, se_f4),
              column.labels = c("Request", "Protest", "WhatsApp Group", "Forming a Group"),
              covariate.labels = c("Request", "Protest", "WhatsApp Group", "Neighborhood Watch"),
              dep.var.labels.include = FALSE,
              add.lines = list(c("Colonia-level fixed effects","Yes", "Yes", "Yes", "Yes"),
                               c("Colonia clustered standard errors", "Yes", "Yes", "Yes", "Yes"),
                               c("Observations", "436", "393", "502", "608")),
              title = "The correlation in crime and non-crime organizing is strongest when we look at the same form of organizing (bold coefficients). This suggests that once respondents have used one method of organizing, they tend to continue to invoke that form of organizing. This relationship holds when we included fixed effects and cluster standard errors, although the observation numbers drop significantly", 
              column.sep.width = "-1em",
              omit=c("Constant"),
              omit.table.layout = "sn",
              notes= c("Fixed Effects Logit models with standard errors clustered at colonia level"),
              type = "text",
              out = "3_Output/Tables/action_by_action.tex")
    rm(list=setdiff(ls(), "dat"))
    
    ####################################################################################
    # Table: We find positive correlations between experience organizing and the different forms of collective action infrastructure
    ####################################################################################
    # Connections
      # wa_neighbors
      # fb_neighbors
    # Knowledge
      # alcaldia_office
    # Beliefs
      # alcaldia_agree 
    # Organizations
      # organizations
    dat$act_noncrime_any_minus_wa <- ifelse(dat$act_noncrime_group ==1|
                                              dat$act_noncrime_request ==1|
                                              dat$act_noncrime_protest ==1,1,0)
    dat$act_crime_any_minus_wa <- ifelse(dat$act_crime_watch ==1|
                                           dat$act_crime_request ==1|
                                           dat$act_crime_protest ==1,1,0)
    dat$action <- ifelse(dat$act_crime_any==1|dat$act_noncrime_any==1,1,0)
    dat$action_notwhatsapp <- ifelse(dat$act_crime_any_minus_wa==1|dat$act_noncrime_any_minus_wa==1,1,0)
    
    dat$wa_neighbors_num <- ifelse(dat$wa_neighbors=="Yes",1,0)
    dat$wa_neighbors_num <- ifelse(is.na(dat$wa_neighbors_num)& dat$last_answer>40,0, dat$wa_neighbors_num)
    
    
    
    dat$fb_neighbors_num <- ifelse(dat$fb_neighbors == "Yes", 1,0)
    dat$fb_neighbors_num <- ifelse(is.na(dat$fb_neighbors_num)& dat$last_answer>40,0, dat$fb_neighbors_num)
    
    dat$alcaldia_office_binary <- ifelse(dat$alcaldia_office== "Yes and I know \n where it is/\nhow to contact them",1,0)
    dat$alcaldia_agree_num <- as.numeric(factor(dat$alcaldia_agree, 
                                                levels = c("None at all", "A little", "A moderate amount", "A great deal", "A lot")))
    
    dat$orgs_comm <- ifelse(is.na(dat$orgs_comm), 0, dat$orgs_comm)
     
    # Knowledge and beliefs
    k1 <- glm(alcaldia_office_binary ~ action_notwhatsapp, data = dat, family = "binomial")
    k2 <- lm(alcaldia_agree_num ~ action_notwhatsapp, data = dat)

    # organizations 
    o1 <- glm(orgs_comm ~ action_notwhatsapp, data = dat, family = "binomial")
   
     # Connections
    m1 <- glm(wa_neighbors_num ~ action_notwhatsapp, data = dat, family = "binomial")
    m2 <- glm(fb_neighbors_num ~ action_notwhatsapp, data = dat, family = "binomial")
    
    
    stargazer( k1,k2, o1,m1,m2, type = "text", 
              covariate.labels = "Organizing",
              dep.var.labels = c("Locates Alcaldia Office", "Knows how to contact alcaldia", "Community org member", "WhatsApp Group", "Facebook Group"),
              keep.stat = c("n"),
              add.lines = list(c("Clustered standard errors", "No", "No", "No", "No", "No")),
              digits=3,
              out = c("3_Output/Tables/infrastructure.tex"),
              title = "We find positive correlations between experience organizing and the different forms of collective action infrastructure") 
    
    rm(list=setdiff(ls(), "dat"))
    